*** This file runs regressions to generate estimates of average effects across different specifications
*** Here we use the CEP policy categories and 2015-2019 as the post period
*** Keep switcher states. Least-Skilled Treated Group
*** Only specifications with no time-varying covariates.
*** Testing Bootstrapping absorbing all dummies





capture log close 
clear all
set more off
set trace off

* If needed change global path to point to directory where files are stored on your computer
*global path "I:/DataSets5/Duncan/Dropbox/Recent Minimum Wage Changes/2020.12 NBER Update/JOLE Precommittment Replication"
global dtadir "$path/Data"
global tabdir "$path/Tables"
global figdir "$path/Figures"
global estdir "$path/Estimates"
global logdir "$path/Logfiles"


log using "$logdir/boot-emp-cep-lowskill-switchers-2015-2019-no-covars.log", replace


*Set seed
set seed 123456

* 2) Define bootstrap program for stratified sampling
capture program drop bs_strat
program define bs_strat, rclass

	*------------------------ 1.1 Syntax -------------------------- 

	syntax, postmin(real) categories(string) sample(string) switchers(string)

	
	*------------------------ 1.2 Set Up -------------------------- 

	preserve
	
	* Define bootstrap sample for stratified sampling.
	* Here, we sample 51 clusters split so that we have:
	* 25 no changer states
	* 9 indexer states
	* 9 small increaser states
	* 8 large increaser states


	
	*------------------------ 1.3 DD and DDD Regression --------------------------  	
 
	* ACS Regressions
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post [aw=perwt] if lowskill == 1 & acs == 1, absorb(i.year i.stateid) cluster(stateid) compact poolsize(2000)
	gen emp_beta_acs_large1=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_acs_small1=(_b[1.StatIncreaserSmall#1.post]) 
	gen emp_beta_acs_index1=(_b[1.indexer#1.post])
	
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post [aw=perwt] if lowskill == 1 & acs == 1, absorb(i.year i.stateid i.age i.educ) cluster(stateid) compact poolsize(2000)
	gen emp_beta_acs_large5=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_acs_small5=(_b[1.StatIncreaserSmall#1.post]) 
	gen emp_beta_acs_index5=(_b[1.indexer#1.post])
	
	reghdfe employed i.Affect_StatLg_Post i.Affect_StatSm_Post i.Affect_Index_Post [aw=perwt] if acs == 1, absorb(i.stateid##i.time i.affected##i.year i.affected##i.stateid) cluster(stateid) compact poolsize(2000)
	gen emp_beta_acs_large7=(_b[1.Affect_StatLg_Post])
	gen emp_beta_acs_small7=(_b[1.Affect_StatSm_Post])
	gen emp_beta_acs_index7=(_b[1.Affect_Index_Post])
	
	reghdfe employed i.Affect_StatLg_Post i.Affect_StatSm_Post i.Affect_Index_Post [aw=perwt] if acs == 1, absorb(i.stateid##i.time i.affected##i.year i.affected##i.stateid i.age i.educ) cluster(stateid) compact poolsize(2000)
	gen emp_beta_acs_large8=(_b[1.Affect_StatLg_Post])
	gen emp_beta_acs_small8=(_b[1.Affect_StatSm_Post])
	gen emp_beta_acs_index8=(_b[1.Affect_Index_Post])
	*/

	* CPS Regressions
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post [aw=perwt] if lowskill == 1 & cps == 1, absorb(i.month##i.year i.stateid) cluster(stateid) compact poolsize(2000)
	gen emp_beta_cps_large1=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_cps_small1=(_b[1.StatIncreaserSmall#1.post])
	gen emp_beta_cps_index1=(_b[1.indexer#1.post])
	
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post [aw=perwt] if lowskill == 1 & cps == 1, absorb(i.month##i.year i.stateid i.age i.educ) cluster(stateid) compact poolsize(2000)
	gen emp_beta_cps_large5=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_cps_small5=(_b[1.StatIncreaserSmall#1.post]) 
	gen emp_beta_cps_index5=(_b[1.indexer#1.post])
	
	reghdfe employed i.Affect_StatLg_Post i.Affect_StatSm_Post i.Affect_Index_Post [aw=perwt] if cps == 1, absorb(i.stateid##i.time i.affected##i.time i.affected##i.stateid) cluster(stateid) compact poolsize(2000)
	gen emp_beta_cps_large7=(_b[1.Affect_StatLg_Post])
	gen emp_beta_cps_small7=(_b[1.Affect_StatSm_Post])
	gen emp_beta_cps_index7=(_b[1.Affect_Index_Post])
	
	reghdfe employed i.Affect_StatLg_Post i.Affect_StatSm_Post i.Affect_Index_Post [aw=perwt] if cps == 1, absorb(i.stateid##i.time i.affected##i.time i.affected##i.stateid i.age i.educ) cluster(stateid) compact poolsize(2000)
	gen emp_beta_cps_large8=(_b[1.Affect_StatLg_Post])
	gen emp_beta_cps_small8=(_b[1.Affect_StatSm_Post])
	gen emp_beta_cps_index8=(_b[1.Affect_Index_Post])
	*/
	
	* Average coefficients across models and store as scalar for each bootstrap replication
	
	* All samples
	egen emp_beta_all_all = rowmean(emp_beta_acs* emp_beta_cps*)
	egen emp_beta_all_large = rowmean(emp_beta_acs_large* emp_beta_cps_large*)
	egen emp_beta_all_small = rowmean(emp_beta_acs_small* emp_beta_cps_small*)
	egen emp_beta_all_indexer = rowmean(emp_beta_acs_index* emp_beta_cps_index*)
	
	* ACS
	egen emp_beta_acs_all = rowmean(emp_beta_acs*)
	egen emp_beta_acs_large = rowmean(emp_beta_acs_large*)
	egen emp_beta_acs_small = rowmean(emp_beta_acs_small*)
	egen emp_beta_acs_indexer = rowmean(emp_beta_acs_index*)
	
	* CPS
	egen emp_beta_cps_all = rowmean(emp_beta_cps*)
	egen emp_beta_cps_large = rowmean(emp_beta_cps_large*)
	egen emp_beta_cps_small = rowmean(emp_beta_cps_small*)
	egen emp_beta_cps_indexer = rowmean(emp_beta_cps_index*)

	* Return means as scalars
	foreach var of varlist emp_beta_all_all-emp_beta_cps_indexer {
		sum `var', meanonly
		return scalar mean_`var' = r(mean)
	}
	restore

end
				   


*****************************************************************************************************
************					   3. Set up data							             ************
***************************************************************************************************** 


tempfile acs
tempfile cps

*** Assemble relevant years of the ACS 
use "$dtadir/ACS-2019.dta", clear 

drop if year < 2011 

*** Drop seniors
drop if age >= 65 | age < 16
drop if empstat == 0

*** Construct economic outcomes of interest 
gen employed = 0
replace employed = 1 if empstat == 1

**** Construct education variables
gen dropout = 0 
replace dropout = 1 if educ < 6
gen highschool = 0 
replace highschool = 1 if educ == 6
gen somecollege = 0
replace somecollege = 1 if educ >= 7 & educ < 10
gen collegeplus = 0
replace collegeplus = 1 if educ >= 10

*** Merge in HPI data
merge m:1 statefip year using "$dtadir/HPI_acs_2019.dta"
drop if _merge == 2
drop _merge

replace HPI = HPI/1000

*** Merge in personal income data
merge m:1 statefip year using "$dtadir/PersonalIncome_acs_2019.dta"
gen lnPersonalIncome = ln(PersonalIncome)
drop if _merge == 2
drop _merge

gen time = year

** creates mid skill employment rate 
gen group = 0 
replace group = 1 if (age <= 30 & age > 21 & highschool == 1) | (age > 30 & age <= 45 & dropout == 1) | (age > 45 & age < 65 & dropout == 1) 

egen stateempD = mean(employed) if group == 1, by(year statefip) 
egen stateempE = max(stateempD), by(year statefip) 

gen lowskill = 0 
replace lowskill = 1 if inrange(age,16,25) & dropout == 1 

gen young = 0 
replace young = 1 if inrange(age,16,21)

gen primeage = 0
replace primeage = 1 if inrange(age,26,54)

keep if lowskill == 1 | primeage == 1

gen acs = 1
gen cps = 0

keep employed year statefip lnPersonalIncome HPI educ age stateempE lowskill primeage acs cps perwt time

compress

save `acs', replace



*** Assemble relevant years of the basic monthly CPS
use "$dtadir/CPS-2019.dta", clear

drop if year < 2011

*** Drop seniors
drop if age >= 65 | age < 16
drop if empstat == 0

*** Construct economic outcomes of interest 

* if empstat = 10: "At work"
* if empstat = 12: "employed, not at work last week

gen employed = 0
replace employed = 1 if  empstat == 10 |  empstat == 12

*** Assume that armed forces are employed
replace employed = 1 if empstat == 1

**** Construct education variables
gen dropout = 0 
replace dropout = 1 if educ < 73
gen highschool = 0 
replace highschool = 1 if educ == 73
gen somecollege = 0
replace somecollege = 1 if educ >= 81 & educ <= 92
gen collegeplus = 0
replace collegeplus = 1 if educ >= 111

gen quarter = 1 if inlist(month,1,2,3)
replace quarter = 2 if inlist(month,4,5,6)
replace quarter = 3 if inlist(month,7,8,9)
replace quarter = 4 if inlist(month,10,11,12)

gen time = (100*year) + month

*** Merge in HPI data
merge m:1 statefip year quarter using "$dtadir/HPI_2019.dta"
drop if _merge == 2
drop _merge

replace HPI = HPI/1000

*** Merge in personal income data
merge m:1 statefip year quarter using "$dtadir/PersonalIncome_2019.dta"
gen lnPersonalIncome = ln(PersonalIncome)
drop if _merge == 2
drop _merge

** creates mid-skill employment rate 
gen group = 0 
replace group = 1 if (age <= 30 & age > 21 & highschool == 1) | (age > 30 & age <= 45 & dropout == 1) | (age > 45 & age < 65 & dropout == 1) 

egen stateempD = mean(employed) if group == 1, by(year month statefip) 
egen stateempE = max(stateempD), by(year month statefip) 

gen lowskill = 0 
replace lowskill = 1 if inrange(age,16,25) & dropout == 1

gen young = 0 
replace young = 1 if inrange(age,16,21)

gen primeage = 0
replace primeage = 1 if inrange(age,26,54)

keep if lowskill == 1 | primeage == 1

gen acs = 0
gen cps = 1

gen perwt = wtfinl

keep employed year month statefip lnPersonalIncome HPI time educ age stateempE lowskill primeage acs cps perwt

compress

append using `acs'

* generate post variable 
cap drop post 
gen post = 0 if inrange(year,2011,2013) 
replace post = 1 if inrange(year,2015,2019)

* Keep only needed observations to reduce memory
drop if missing(post)
keep if (lowskill == 1 | primeage == 1)


* merge in policy categories 
cap drop originaltype-increase5 
merge m:1 statefip using "$dtadir/min_wage_variables_for_ACS_and_CPS_analysis.dta", nogen keepusing(originaltype jan*min) 

cap drop indexer StatIncreaserLarge StatIncreaserSmall statutoryincreasein2014or2015 statutoryincreasein2014to2017 statutoryincreasein2014to2018

gen indexer = 0 
gen StatIncreaserLarge = 0 
gen StatIncreaserSmall = 0 
gen statutoryincreasein2014or2015 = 0
gen statutoryincreasein2014to2018 = 0

* CEP Categories
replace indexer = 1 if originaltype == "Indexer" 
replace statutoryincreasein2014or2015 = 1 if (jan2016min - jan2013min) > 0 & indexer == 0 
replace StatIncreaserLarge = 1 if indexer == 0 & (jan2015min - jan2013min) >= 1 & (jan2016min - jan2013min) != . 
replace StatIncreaserSmall = 1 if indexer == 0 & statutoryincreasein2014or2015 == 1 & StatIncreaserLarge == 0 

* Generate variables for DDD regressions
gen affected = .
replace affected = 1 if lowskill == 1
replace affected = 0 if primeage == 1

gen Affect_Index_Post = affected*indexer*post
gen Affect_StatLg_Post = affected*StatIncreaserLarge*post
gen Affect_StatSm_Post = affected*StatIncreaserSmall*post


* Generate policygroup variable for doing proportional sampling correctly
gen policygroup = 1
replace policygroup = 2 if indexer == 1
replace policygroup = 3 if StatIncreaserSmall == 1
replace policygroup = 4 if StatIncreaserLarge == 1

compress

cd "$estdir/Bootstrap/no-tvc"

timer clear
timer on 1

* Run bootstrap command for stratified sampling for all means, ACS means, and CPS means for all changer states, large increasers, small increasers, and indexers.

* Policy categories: cep. Sample: lowskill. Switcher states: switchers. Post start: 2015 post end: 2019.
bootstrap emp_beta_all_allchange=r(mean_emp_beta_all_all) emp_beta_all_large=r(mean_emp_beta_all_large) emp_beta_all_small=r(mean_emp_beta_all_small) emp_beta_all_index=r(mean_emp_beta_all_indexer) ///
emp_beta_acs_allchange=r(mean_emp_beta_acs_all) emp_beta_acs_large=r(mean_emp_beta_acs_large) emp_beta_acs_small=r(mean_emp_beta_acs_small) emp_beta_acs_index=r(mean_emp_beta_acs_indexer) ///
emp_beta_cps_allchange=r(mean_emp_beta_cps_all) emp_beta_cps_large=r(mean_emp_beta_cps_large) emp_beta_cps_small=r(mean_emp_beta_cps_small) emp_beta_cps_index=r(mean_emp_beta_cps_indexer), ///
rep(100) noisily cluster(statefip) strata(policygroup) idcluster(stateid) saving("boot-emp-cep-lowskill-switchers-2015-2019", replace): bs_strat, postmin(2015) categories(cep) sample(lowskill) switchers(switchers)

* Save data from original call with data as is.
mat res = r(table) 
svmat res, names(col)
gen stat = ""
gen n = _n
replace stat = "beta" if n == 1
replace stat = "boot_se" if n == 2
replace stat = "z-score" if n == 3
replace stat = "pval" if n == 4
replace stat = "lo_ci" if n == 5
replace stat = "hi_ci" if n == 6

keep emp_beta_all_allchange-stat

* Add labels to bootstrap code sample
cap drop policycat sample switchers postmin postmax
gen policycat = "cep"
gen sample = "lowskill"
gen switchers = "switchers"
gen postmin = 2015
gen postmax = 2019

drop if missing(stat)

save "$estdir/Bootstrap/no-tvc/coef-emp-cep-lowskill-switchers-2015-2019.dta", replace


*  Add labels to bootstrap code sample
use "boot-emp-cep-lowskill-switchers-2015-2019.dta", clear
cap drop policycat sample switchers postmin postmax
gen policycat = "cep"
gen sample = "lowskill"
gen switchers = "switchers"
gen postmin = 2015
gen postmax = 2019
gen iteration = _n

label var policycat "CEP or New Policy Categories"
label var sample "Least-Skilled or Young Sample"
label var switchers "Switchers or No Switchers"
label var postmin "First Year of Post Period"
label var postmax "Last Year Post Period"
label var iteration "Bootstrap Iteration Number"

save "$estdir/Bootstrap/no-tvc/boot-emp-cep-lowskill-switchers-2015-2019.dta", replace

di "Policy categories: cep. Sample: lowskill. Switcher states: switchers. Post start: 2015 post end: 2019. Done" 


timer off 1
timer list 1
log close

